clear
clc
close all
addpath("functions\")
addpath("D:\2 - Data availability\2 - Macroregions");

%% Load colours
red = importdata("red_palette.txt");
for i = 1 : height(red)
    red(i, :) = red(i, :) / 255;
end

violet = importdata("violet_palette.txt");
for i = 1 : height(violet)
    violet(i, :) = violet(i, :) / 255;
end

%% INPUT DATA

% eu zones: ["clipped_eu_italia_official", "clipped_eu_veneto", "clipped_official_europa"];
% na zones: ["clipped_na_states", "clipped_eastcoast"];
% world: ["world"]

opts.zones = ["clipped_eu_italia_official"];
areaZones =  301000  ; % in [km2]     
                       
% ITALIA: 301000

% EAST COAST: 7.06*10^5 

% WORLD: 1.33*10^8 km2


% eu labels: ["VNT", "ITA", "EUR"];
% na labels: ["USA"];
% world label: ["WORLD"]
opts.zoneLabels = ["ITA"];

opts.colors = [0 0 0]/255;

opts.shapefile = "%ZONE%.shp";
opts.inLakePercentLimit = 0.5;
opts.zoneClasses = 200;

% intervals
opts.zoneLimits = [10^-2 10^7];         % [km2]
opts.pLawInterval = [10^2 10^5];


%% Data loading, calculation of the pdfs and of the power laws

data = dataLoading(opts);
%%
pdfs = calcPdfs(data, opts);
pLaw = fitPowerLaws(pdfs, opts);

Nstretches_tot = length(data.class);
id = data.class == 1;
Nstretches_wet = length(data.class(id));

%% Calculation of total, perennial and temporary length

% normal scale for both
L = pdfs.LtAcc;     % total cumulative length
Lw = pdfs.LwAcc;    % wet cumulative length
Ld = pdfs.LdAcc;    % dry cumulative length

% Geomorphic drainage network length
Lnet = 5 * areaZones;  % Dd = 5 km^-1

% Threshold tested areas
Astar = pdfs.Astar;
int = length(Astar);


%% CALIBRATION OF A and B

% Removal of the finite size effect tail

id = Astar >= 10^2; % ONLY A* >= 100 km2
h_test = length(L(id));
L_test = L(id);
Astar_test = Astar(id);

h = length(L_test);
Rsquare_result = NaN(h, h);
A = NaN(h,h);   % matrix a of the power law
B = NaN(h,h);   % matrix b of the power law
N = NaN(h,h);   % difference between k and i

percent_points = 0.4;
n_points = round(h*percent_points);   % min number of points

for i = 1 : h_test - n_points
    for k = (i + n_points - 1) : h_test

        N(i,k) = k-i;

        X = log(Astar_test((i:k),1));
        Y = log(L_test((i:k),1));

        b = cov(X, Y);
        b = b(1, 2) / b(1, 1);      % Sxy / Sxx
        a = exp(mean(Y) - b*mean(X));
        A(i,k) = a;
        B(i,k) = b;

        Yreg = log(a) + b*X;
        RSS = sum((Y-Yreg).^2);
        TSS = sum((Y-mean(Y)).^2);
        Rsquare = 1 - RSS/TSS;
        
        Rsquare_result(i,k) = Rsquare;
    end
end

% Selection of a threshold to remove non-fitted data
Amin_good = (Lnet ./ A) .^(1./B);   % B is negative

threshold = Rsquare_result >= 0.95;    
A_good = A(threshold);
B_good = B(threshold);
B_good = abs(B_good);

pA = ksdensity(A_good, length(A_good));
pB = ksdensity(B_good, length(B_good));
likelihood = (pA .* pB)';

[~, i] = max(likelihood);
A_best = A_good(i);
B_best = B_good(i);

Abig = 10.^(linspace(log10(10^-4), log10(10^7), length(L)))';

x = Abig;
y = NaN(length(x), length(B_good));

for i = 1 : length(x)
    for j = 1 : length(B_good)
        y(i, j) = A_good(j) .* x(i) .^ (-B_good(j));    % B is positive number
    end
end

% Find the best curve that interpolates the points
y_best = A_best .* x .^ (-B_best);
y_baby = A_best .* Astar .^ (-B_best);


interval = abs(L-y_baby) < 0.1 * y_baby;    % points within 10% of the interpolation line

RSS = sum((L(interval) - y_baby(interval)).^2);
TSS = sum((L(interval) - mean(L(interval))).^2);
RsquareL = 1 - RSS/TSS;


%% PLOT OF THE LENGTHS

figure()
    
    legendEntry = plot(NaN, NaN, '.','Color', 'none', 'DisplayName', opts.zoneLabels);
    hold on
    % yline(Lnet, '-', 'LineWidth',2,'Color',color.black.rgb, 'DisplayName','L_{net}')
    hold on
    plot(x, y(:, 1:50:end), '-', 'LineWidth', 2,'Color', color.light_grey.rgb, 'HandleVisibility','off');
    legendEntry = plot(NaN, NaN, '-', 'LineWidth', 2,'Color', color.light_grey.rgb, ...
        'DisplayName', "Interpolation lines");
    hold on
    plot(Astar(1:3:end), L(1:3:end), 'o', 'MarkerSize', 4, 'MarkerFaceColor', violet(3,:), ...
        'MarkerEdgeColor', violet(4, :), 'LineWidth', 0.1, 'DisplayName', "Data");
    hold on
    % plot(Astar(interval), L(interval), '.', 'MarkerSize', 30, 'Color', color.red.rgb, 'DisplayName', "Best fitting data")
    hold on
    % plot(x, y_best, '-', 'LineWidth', 5,'Color', color.red.rgb, 'DisplayName', "Best fitting line: two steps");

    set(gca, "XScale", 'log');
    set(gca, "Yscale","log");
    set(gcf, "Units", "centimeters");
    set(gcf, 'Position', [3 3 9 6]);
    set(gca,'ClippingStyle','rectangle');
    box on

    xlabel('Contributing area (km^2)', 'FontSize', 8);
    ylabel('Total Length (km)', 'FontSize', 8);
    leg = legend('Location', 'northeast', 'Orientation','vertical');
    leg.FontSize = 8;
    leg.ItemTokenSize = [10, 10]; % [lunghezza, altezza]
    leg.Box = 'off';
    xlim([10^-3, 10^7]);
    ylim([10^0, 10^13]);
   
    ax = gca;
    ax.FontSize = 8;
    ax.TickLength = [0.01, 0.01]; 
    ax.YTick = [10^0, 10^13];

%% CDF LENGTH


% AREA
[area_sort, id] = sort(data.area);
[length_sort_a] = data.length(id);
[cdf_area] = cumsum(length_sort_a) / Lnet; % value < soglia
ccdf_area = 1 - cdf_area; % value > soglia

id = find(area_sort >= 10, 1);
AREA10 = area_sort(id);
FRAC10 = ccdf_area(id);


%% C and D CALIBRATION with all GOOD values of A and B

% Definition of phi and limit value Amax where phi == 0 
PHI = pdfs.PdAcc;
threshold = find(PHI <= 0.01, 1);    
Athr = Astar(threshold);

idL = find(Astar >= 20, 1 );    % cut at A = 20 km2
idM = find(Astar >= Athr, 1);
Astar_cut = Astar((idL:idM), 1);
PHI_cut = PHI((idL:idM), 1);

% Definitions of the ranges of variations of the parameters 
% (MONTE CARLO METHOD)

Cmin = 0.01;
Cmax = max(A_good);
Dmin =  0.01;
Dmax = max(B_good);

N = 10^7;     % # values to test with Monte Carlo method

a = min(A_good) + (max(A_good) - min(A_good)) * rand(N,1);
b = min(B_good) + (max(B_good) - min(B_good)) * rand(N, 1);
c = Cmin + (Cmax-Cmin) * rand(N, 1);
d = Dmin + (Dmax-Dmin) * rand(N, 1);

% PROCEDURE APPLICATION

PARAM_TWOSTEPS = NaN(N, 9);

for i = 1 : N   % righe della matrice param_twosteps
    
    j = randi(length(A_good), 1);
    A = A_good(j);
    B = B_good(j);
    
    C = c(i);
    D = d(i);

    ATHRESHOLD = (C / A) .^(1/(D-B));
    AMIN = (Lnet/A).^(-1/B);

    K = C / A;
    GAMMA = B - D;

    PHIresult = NaN(numel(Astar_cut), 1);
    
    % before Athreshold
    id = Astar_cut < ATHRESHOLD;
    PHIresult(id) = 1 - K .* Astar_cut(id).^(GAMMA);

    % after Athreshold
    id = ~id;
    PHIresult(id) = zeros(length(Astar_cut(id)), 1);
    
    % Calculation of R^2
    SS_res = sum((PHI_cut - PHIresult).^2);
    SS_tot = sum((PHI_cut - mean(PHI_cut)).^2);
    Rsquare_result = 1 - (SS_res / SS_tot);

    % Saving the variables 
    PARAM_TWOSTEPS(i, :) = [A, B, C, D, AMIN, ATHRESHOLD, K, GAMMA, Rsquare_result];
    
end

PARAM_TWOSTEPS(any(isnan(PARAM_TWOSTEPS), 2), :) = [];        % delete any NaN entries

A_3 = PARAM_TWOSTEPS(:,1);
B_3 = PARAM_TWOSTEPS(:, 2);
C_3 = PARAM_TWOSTEPS(:, 3);
D_3 = PARAM_TWOSTEPS(:, 4);
AMIN_3 = PARAM_TWOSTEPS(:, 5);
ATHRESHOLD_3 = PARAM_TWOSTEPS(:, 6);
K_3 = PARAM_TWOSTEPS(:, 7);
GAMMA_3 = PARAM_TWOSTEPS(:, 8);
R2_3 = PARAM_TWOSTEPS(:, 9);

% Selection of the best values
threshold = R2_3 >= 0.95 * max(R2_3) & AMIN_3 < 10; %(0.95*Athr <= ATHRESHOLD_3) & (ATHRESHOLD_3 <= 1.05*Athr);
A_3_int = A_3(threshold);
B_3_int = B_3(threshold);
C_3_int = C_3(threshold);
D_3_int = D_3(threshold);
K_3_int = K_3(threshold);
GAMMA_3_int = GAMMA_3(threshold);
AMIN_3_int = AMIN_3(threshold);
ATHRESHOLD_3_int = ATHRESHOLD_3(threshold);
R2_3_int = R2_3(threshold);

% Finding the best combination
id = find(R2_3_int == max(R2_3_int));
A_3 = A_3_int(id);
B_3 = B_3_int(id);
C_3 = C_3_int(id);
D_3 = D_3_int(id);
K_3 = K_3_int(id);
GAMMA_3 = GAMMA_3_int(id);
AMIN_3 = AMIN_3_int(id);
ATHRESHOLD_3 = ATHRESHOLD_3_int(id);

% -----------------------------------------------------------------------%
% LOCAL SCALE (on Astar)
% Best case 3
id = Astar < ATHRESHOLD_3;
curve_3_baby(id) = 1 - K_3 .* Astar(id).^(GAMMA_3);
id = ~id;
curve_3_baby(id) = zeros(length(Astar(id)), 1);
curve_3_baby = curve_3_baby';

% Interval 1
curve_3_int_baby = NaN(length(Astar), height(ATHRESHOLD_3_int));
for i = 1 : height(ATHRESHOLD_3_int)
    id = Astar < ATHRESHOLD_3_int(i);
    curve_3_int_baby(id, i) = 1 - K_3_int(i) .* Astar(id) .^ GAMMA_3_int(i);
    id = ~id;
    curve_3_int_baby(id, i) = zeros(length(Astar(id)), 1);
end

% -----------------------------------------------------------------------%
% REGIONAL SCALE (on Abig)
% Best case 1
id = Abig < ATHRESHOLD_3;
curve_3(id) = 1 - K_3 .* Abig(id).^(GAMMA_3);
curve_botter(id) = (1 - K_3 .* AMIN_3.^(GAMMA_3) .* ...
        (1 - (Abig(id)./AMIN_3).^(-D_3)) ./ (1 - (Abig(id)./AMIN_3).^(-B_3)));
id = ~id;
curve_3(id) = zeros(length(Abig(id)), 1);
curve_botter(id) = (1 - K_3 .* AMIN_3 .^(GAMMA_3)) ./ (1-(Abig(id)./AMIN_3).^(-B_3));
curve_3 = curve_3';

% Interval 1
curve_3_int_big = NaN(length(Astar), height(ATHRESHOLD_3_int));
for i = 1 : height(ATHRESHOLD_3_int)
    id = Abig < ATHRESHOLD_3_int(i);
    curve_3_int_big(id, i) = 1 - K_3_int(i) .* Abig(id) .^ GAMMA_3_int(i);
    id = ~id;
    curve_3_int_big(id, i) = zeros(length(Abig(id)), 1);
end

% -----------------------------------------------------------------------%
% Addition of the interpolation line with the best parameters
y_3 = A_3 .* Abig .^ (-B_3);
y_3_baby = A_3 .* Astar .^ (-B_3);

y_3_int = NaN(int, length(A_3_int));
for i = 1 : length(A_3_int)
    y_3_int(:, i) = A_3_int(i) .* Abig .^ (-B_3_int(i));
end

interval = abs(L-y_3_baby) < 0.1 * y_3_baby; 

RSS = sum((L(interval) - y_3_baby(interval)).^2);
TSS = sum((L(interval) - mean(L(interval))).^2);
RsquareL_3 = 1 - RSS/TSS;

%% LIKELIHOOD

% Likelihood of a and b
pA = ksdensity(A_good, A_good);
pB = ksdensity(B_good, B_good); 

likelihood_ab = (pA .* pB)';

% Likelihood of c and d
pC = ksdensity(C_3_int, C_3_int);
pD = ksdensity(D_3_int, D_3_int); 

likelihood_cd = (pC .* pD)';

% Plot of HISTOGRAMS + LIKELIHOOD
figure()

    set(gcf, "Units", "centimeters");
    set(gcf, 'Position', [3 3 18 5]);

    subplot(1,4,1)
    histogram(A_good, 10, "Normalization", "pdf", 'DisplayName', 'Histogram of a');
    hold on;
    [ya, xia] = ksdensity(A_good);
    plot(xia, ya, "LineWidth", 2, 'Color', color.red.rgb,"DisplayName","Pdf(a)");
    xlabel("Coefficient a", 'FontSize',8);
    ylabel("Frequency", 'FontSize',8);
    leg = legend('Location', 'southoutside', 'Orientation','horizontal');
    leg.FontSize = 8;
    leg.ItemTokenSize = [8, 8]; % [lunghezza, altezza]
    leg.Box = 'off';
    ax = gca;
    ax.FontSize = 8;
    ax.TickLength = [0.01, 0.01];
    xlim([0.9*min(A_good), 1.1*max(A_good)]);

    subplot(1,4,2)

    histogram(B_good, 10, "Normalization","pdf", 'DisplayName', 'Histogram of b');
    hold on;
    [yb, xib] = ksdensity(B_good);
    plot(xib, yb, "LineWidth", 2, 'Color', color.red.rgb,"DisplayName", "Pdf(b)");
    xlabel("Coefficient b", 'FontSize',8);
    ylabel("Frequency", 'FontSize',8);
    leg = legend('Location', 'southoutside', 'Orientation','horizontal');
    leg.FontSize = 8;
    leg.ItemTokenSize = [8, 8]; % [lunghezza, altezza]
    leg.Box = 'off';
    ax = gca;
    ax.FontSize = 8;
    ax.TickLength = [0.01, 0.01]; 
    xlim([0.9*min(B_good), 1.1*max(B_good)]);

    subplot(1,4,3)
    histogram(C_3_int, 10, "Normalization", "pdf", 'DisplayName', 'Histogram of c');
    hold on;
    [yc, xic] = ksdensity(C_3_int);
    plot(xic, yc, "LineWidth", 2, 'Color', color.red.rgb,"DisplayName","Pdf(c)");
    xlabel("Coefficient c", 'FontSize',8);
    ylabel("Frequency", 'FontSize',8);
    leg = legend('Location', 'southoutside', 'Orientation','horizontal');
    leg.FontSize = 8;
    leg.ItemTokenSize = [8, 8]; % [lunghezza, altezza]
    leg.Box = 'off';
    ax = gca;
    ax.FontSize = 8;
    ax.TickLength = [0.01, 0.01]; 
    xlim([0.9*min(C_3_int), 1.1*max(C_3_int)]);

    subplot(1,4,4)
    histogram(D_3_int, 10, "Normalization", "pdf", 'DisplayName', 'Histogram of d');
    hold on;
    [yd, xid] = ksdensity(D_3_int);
    plot(xid, yd, "LineWidth", 2, 'Color', color.red.rgb,"DisplayName","Pdf(d)");
    xlabel("Coefficient d", 'FontSize',8);
    ylabel("Frequency", 'FontSize',8);
    leg = legend('Location', 'southoutside', 'Orientation','horizontal');
    leg.FontSize = 8;
    leg.ItemTokenSize = [8, 8]; % [lunghezza, altezza]
    leg.Box = 'off';
    ax = gca;
    ax.FontSize = 8;
    ax.TickLength = [0.01, 0.01]; 
    xlim([0.9*min(D_3_int), 1.1*max(D_3_int)]);

%% INFORMATIVE CONTENT: 
% calculation of how informative PhiC and PhiN are at different spatial scales

id = find(Abig >= 0.85*AMIN_3, 1);
Area_int = Abig(id:end);
curve_minor = curve_botter(id:end);      % PhiC
curve_major = curve_3(id:end);  % PhiN

IC = NaN(length(curve_major));
for i = 1 : length(curve_minor)
    IC(i) = (curve_minor(end) - curve_major(i)) / (curve_minor(i) - curve_major(i));
end

id = find(IC >= 0.9, 1);
ic90 = IC(id);
area90 = Area_int(id);

id = find(Area_int >= areaZones, 1);
icCAT = IC(id);
areaCAT = Area_int(id);

IC_COMPL = 1 - IC;

figure()

    legendEntry = plot(NaN, NaN, '.','Color', 'none', 'DisplayName', opts.zoneLabels);
    hold on
    plot(Area_int, IC, '-', 'LineWidth', 3, 'Color', color.black.rgb, 'HandleVisibility', 'off');
    hold on
    plot(Area_int, IC_COMPL, '-', 'LineWidth', 3, 'Color', color.grey.rgb, 'HandleVisibility', 'off');
    hold on
    legendEntry1 = plot(NaN, NaN, '-', 'LineWidth', 3, 'Color', color.black.rgb, 'DisplayName', 'ic' );
    hold on
    legendEntry2 = plot(NaN, NaN, '-', 'LineWidth', 3, 'Color', color.grey.rgb, 'DisplayName', '1 - ic' );
    hold on
    plot(areaCAT, icCAT, '*', 'MarkerSize', 8, 'LineWidth', 2, 'DisplayName', 'ic(A_{max})=IC')
    set(gca, 'XScale', 'log');
    xlim([AMIN_3, 10^7])
    ylim([0, 1])
    set(gcf, 'Position', [50 50 800 300]);

    xlabel('Contributing area (km^2)', 'FontSize', 25);
    ylabel('\alpha', 'FontSize', 25);
    leg = legend('Location', 'east', 'Orientation','vertical');
    leg.FontSize = 15;
    leg.ItemTokenSize = [20, 20]; % [lunghezza, altezza]
    leg.Box = 'off';
    ax = gca;
    ax.FontSize = 15;
    ax.TickLength = [0.01, 0.01];


%% Plot of the TEMPORARY FRACTION

% REDUCTION of displayed points of Tf Fix

Tf = PHI(1:3:end);
Area = Astar(1:3:end);


    figure()

    set(gcf, 'Renderer', 'OpenGL');

    plot(Abig, curve_3_int_big, '-','Color', [0.9 0.9 0.9], 'LineWidth', 1.5, 'HandleVisibility','off');
    hold on
    legendEntry = plot(NaN, NaN, '.','Color', 'none', 'DisplayName', opts.zoneLabels);
    hold on
    legendEntry1 = plot(NaN, NaN, '-','Color', [0.9 0.9 0.9], 'LineWidth', 5, 'DisplayName', 'Confidence interval');
    hold on
    plot(Abig, curve_3, '-','Color', red(5, :), 'LineWidth', 2, 'DisplayName',"Model (Eq. 2)");
    hold on
    
    plot(Area, Tf, 'o', 'MarkerSize', 5, 'MarkerEdgeColor', violet(6, :), 'MarkerFaceColor', violet(5,:), 'LineWidth', 0.8, ...
         'DisplayName', 'GCRFM');
    
    
    set(gca, "XScale", 'log');
    set(gcf, "Units", "centimeters");
    set(gcf, 'Position', [3 3 9 5]);
    get(gcf, 'Renderer');
    set(gcf, 'Renderer', 'painters');

    xlabel('Contributing area (km^2)', 'FontSize', 8);
    ylabel('Temporary fraction', 'FontSize', 8);
    leg = legend('Location', 'northeast', 'Orientation','vertical');
    leg.FontSize = 6;
    leg.ItemTokenSize = [8, 8]; % [lunghezza, altezza]
    leg.Box = 'off';
    
    ax = gca;
    ax.FontSize = 5;
    ax.TickLength = [0.01, 0.01]; 
    ax.XTick = [10^-2 10^-1 10^0 10^1 10^2 10^3 10^4 10^5 10^6 10^7];
    
    xlim([AMIN_3, 10^7]);
    ylim([0, 1]);



%% ERRORS

% Standard Deviation of Tf

dev = std(curve_3_int_big(1, :));

% Root Mean Square Error
RMSE_3 = rmse(PHI, curve_3_baby);

% Nash-Sutcliffe Efficiency
NSE_3 = 1 - ( sum((PHI - curve_3_baby).^2) / sum(PHI - mean(PHI).^2));

% Klinge-Gupta Efficiency
% S = sperimentale, O = osservato
% parameters: 
% r = sigma_SO / (sigma_S * sigma_O)
% alpha = sigma_S / sigma_O
% beta = mu_S / mu_O

matrix = cov(curve_3_baby, PHI);
r = matrix(1,2) / (std(curve_3_baby) * std(PHI));
Alpha = std(curve_3_baby) / std(PHI);
Beta = mean(curve_3_baby) / mean(PHI);

KGE_3 = 1 - sqrt((r - 1)^2 + (Alpha - 1)^2 + (Beta - 1)^2);

% R^2 on C and D
id = Astar >= 10;
RSS = sum((PHI(id) - curve_3_baby(id)).^2);
TSS = sum((PHI(id) - mean(PHI(id))).^2);
RsquarePHI_3 = 1 - (RSS / TSS);

%% Table with the best results

id = find(Abig >= 0.9*AMIN_3, 1);

TAB3 = table(AMIN_3, min(ATHRESHOLD_3_int), ATHRESHOLD_3, max(ATHRESHOLD_3_int), ...
    A_3, B_3, C_3, D_3, K_3, GAMMA_3, ...
    min(curve_3_int_big(id, :)), curve_3(id), max(curve_3_int_big(id, :)), dev, RsquareL_3, RMSE_3, NSE_3, KGE_3, RsquarePHI_3, ...
    'VariableNames',{'Amin','low Athr', 'Athr', 'max Athr', ...
    'a','b','c', 'd','K','gamma', ...
    'min asymp', 'F2', 'max asymp', 'DevSt', 'R2_L','RMSE', 'NSE', 'KGE','R2_PHI' });

% Save the results

% save(strrep("%ZONES%", "%ZONES%", opts.zones), "Astar","AMIN_3", "ATHRESHOLD_3", ...
%     "A_3", "B_3", "C_3", "D_3", "K_3", "GAMMA_3", "L", "Lw", "Ld", ...
%     "PHI", "curve_3", "curve_3_int_big", "curve_botter", ...
%     "ic90", "area90", "icCAT", "areaCAT");

